###Figures 5, 6 , 7 and 8

#and Table 7

library(foreign)
library(readstata13)
library(dplyr)
library(plyr)
setwd("C:/Users/kmm7999/Dropbox/Social_Media_Lab/Data_Survey/YouGov_Data/apsr_replication/")

#setwd("C:/Users/kevin/Dropbox/Social_Media_Lab/Data_Survey/YouGov_Data/apsr_replication/")
load("merged_nr_soma.RData")



## Recode the Twitter frequency use variable to better reflect the underlying categories
## Roughly, how many times per month do you go on Twitter?
data$twitter_freq_inc<-as.numeric(
  revalue(as.character(data$twitter_freq_inc) , 
          c("7"= "90", "6"="30", "5" = " 15", "4" = "7", "3"="2", 
            "2"="1", "1"="0")))

###need the all_tweets variable un-logged, and to avoid dividing by zero

data$tweets_all_topics<-(1.0001 +data$tweets_all_topics)

data$all_tweets_log<-1/log(data$tweets_all_topics)

#########################################################################
##Loop over all models
#########################################################################



controls <- c("woman", "age", "lowerclass", "profile_education_age", 
              "white_british", "married", "newsnight_freq", "religious",
              "internet_freq_inc",  "newspaper_type" )




full_parties <- c("Labour", "UKIP", "LibDem", "Tories")

parties <- c("labour", "UKIP", "libdem", "conserv")
issues <- c("EU", "spending", "immigration")
waves <- c("w1", "w2", "w3", "w4")
t_parties <- c("labo", "ukip", "lide", "tory")

##need to change t_issues to accomodate isis

qs <- c("isis","unemployment",  "immigrants")
t_issues <- c("isis", "economy", "immigr")
t_waves <- c("p1", "p2", "p3", "p4")
pid <- c(2, 5, 3, 1)
tt_issues <- c("isis", "econ", "imm")





##
names<-list()

models_combo=list()
models_lm=list()
models_full=list()
models_full_raw=list()


labo_means=list()
labo_ses=list()
ukip_means=list()
ukip_ses=list()
lide_means=list()
lide_ses=list()
tory_means=list()
tory_ses=list()
right_media_means=list()
right_media_ses=list()
cent_media_means=list()
cent_media_ses=list()
left_media_means=list()
left_media_ses=list()




## Here the outcome is whether or not they were correct in wave 3,
## the baseline is whether or not they were correct in wave 2
## We go from Wave 2 to Wave 3 because these are the only waves we asked these questions

for (j in 1:length(qs)){

  data$outcome<-abs(eval(parse(text=paste0("data$fact_" , qs[j], 
                                           "_w3_correct_dk0" ))))
  
  data$baseline<-abs(eval(parse(text=paste0("data$fact_" , qs[j], 
                                            "_w2_correct_dk0"))))
  
  ##Look at the relevant tweets from each party in the 3rd time period
  data$tweet_count_labo<-  (
    eval(parse(text=paste0("data$p3_1", "_", t_issues[j], "_labo"))) +
      eval(parse(text=paste0("data$p3_2", "_", t_issues[j], "_labo")))
  )
  
  data$tweet_count_lide<-  (
    eval(parse(text=paste0("data$p3_1", "_", t_issues[j], "_lide"))) +
      eval(parse(text=paste0("data$p3_2", "_", t_issues[j], "_lide")))
  )
  
  data$tweet_count_ukip<-  (
    eval(parse(text=paste0("data$p3_1", "_", t_issues[j], "_ukip"))) +
      eval(parse(text=paste0("data$p3_2", "_", t_issues[j], "_ukip")))
  )
  
  data$tweet_count_tory<-  (
    eval(parse(text=paste0("data$p3_1", "_", t_issues[j], "_tory"))) +
      eval(parse(text=paste0("data$p3_2", "_", t_issues[j], "_tory")))
  )
  
  
  
  
  data$right_media<-{ 
    + eval(parse(text=paste0("data$p3_media_right_", t_issues[j])))
  }
  
  data$right_media_log<-log(1.00001 + data$right_media)
  
  data$left_media<-{ 
    + eval(parse(text=paste0("data$p3_media_left_", t_issues[j])))
  }
  
  data$left_media_log<-log(1.00001 + data$left_media)
  
  
  data$cent_media<-{ 
    + eval(parse(text=paste0("data$p3_media_cent_", t_issues[j])))
  }
  
  data$cent_media_log<-log(1.00001 + data$cent_media)
  
  
  
  ##create aggregate variables 
  data$parties<-log(1.0001+ 
                      (data$tweet_count_lide) + (data$tweet_count_labo) +
                      (data$tweet_count_ukip) + (data$tweet_count_tory))
  
  data$media<-log(1.0001+data$cent_media + data$left_media + data$right_media)
  
  data$total<-log(1.0001+ 
                    (data$tweet_count_lide) + (data$tweet_count_labo) +
                    (data$tweet_count_ukip) + (data$tweet_count_tory) +
                    data$cent_media + data$left_media + data$right_media)
  
 
  
  ##log the main variables 
  
  data$tweet_count_tory<-log(1.00001 + data$tweet_count_tory)
  
  data$tweet_count_labo<-log(1.00001 + data$tweet_count_labo)
  
  data$tweet_count_lide<-log(1.00001 + data$tweet_count_lide)
  
  data$tweet_count_ukip<-log(1.00001 + data$tweet_count_ukip)
  
  
  ##various model specifications
  
  #full
  models_full[[j]]<-summary(glm(paste0("outcome ~ baseline + total + twitter_freq_inc  +"
                                       , paste0(controls, collapse="+")), data=data, na.action=na.exclude, family="binomial"))
  
  models_full_raw[[j]]<-(glm(paste0("outcome ~ baseline + total + twitter_freq_inc  +"
                                    , paste0(controls, collapse="+")), data=data, na.action=na.exclude, family="binomial"))
  
  #combo
  models_combo[[j]]<-summary(glm(paste0("outcome ~ baseline + media + parties + twitter_freq_inc  +"
                                           , paste0(controls, collapse="+")), data=data, na.action=na.exclude, family="binomial"))
  
  
  
  #no interactions
  
  models_lm[[j]]<-summary(glm(paste0("outcome ~ baseline + tweet_count_labo + tweet_count_ukip + tweet_count_lide + tweet_count_tory +  
                                     right_media_log + left_media_log + cent_media_log+ twitter_freq_inc  + "
                                     , paste0(controls, collapse="+")), data=data, na.action=na.exclude, family="binomial"))
  
  
}

################graphing--all tweets

full_qs<-c("ISIS", "Unemployment", "Immigration")


names<-c("ISIS", "Unemployment", "Immigration")
index<-c(1, 2, 3)
##extract mean values 
means<-c(models_full[[1]]$coefficients["total","Estimate"],
         models_full[[2]]$coefficients["total","Estimate"],
         models_full[[3]]$coefficients["total","Estimate"]
         
         
)

##extract standard errors values 
ses<-c(models_full[[1]]$coefficients["total","Std. Error"],
       models_full[[2]]$coefficients["total","Std. Error"],
       models_full[[3]]$coefficients["total","Std. Error"]
)


means<-unlist(means)
ses<-unlist(ses)

################graphing--combined results

full_qs<-c("ISIS", "Unemployment", "Immigration")

all_means<-vector()
all_ses<-vector()


for(i in 1:length(models_combo)){
  names<-c("Parties", "Media")
  index<-c(.1, .2)
  ##extract mean values 
  means<-c(models_combo[[i]]$coefficients["parties","Estimate"],
           models_combo[[i]]$coefficients["media","Estimate"])
  
  ##extract standard errors values 
  ses<-c(models_combo[[i]]$coefficients["parties","Std. Error"],
         models_combo[[i]]$coefficients["media","Std. Error"])  
  
  
  all_means<-c(all_means,unlist(means))
  all_ses<-c(all_ses,unlist(ses))
  
  
  
}



all_means_fact<-all_means

all_ses_fact<-all_ses


######################################Run the same for the placements

setwd("C:/Users/kevin/Dropbox/Social_Media_Lab/Data_Survey/YouGov_Data/apsr_replication/")
load("merged_nr_soma.RData")



## Recode the Twitter frequency use variable to better reflect the underlying categories
## Roughly, how many times per month do you go on Twitter?
data$twitter_freq_inc<-as.numeric(
  revalue(as.character(data$twitter_freq_inc) , 
          c("7"= "90", "6"="30", "5" = " 15", "4" = "7", "3"="2", 
            "2"="1", "1"="0")))

###need the all_tweets variable un-logged, and to avoid dividing by zero

data$tweets_all_topics<-(1.0001 +data$tweets_all_topics)

data$all_tweets_log<-1/log(data$tweets_all_topics)

#########################################################################
##Loop over all models
#########################################################################


controls <- c("woman", "age", "lowerclass", "profile_education_age", 
              "white_british", "married", "newsnight_freq", "religious",
              "internet_freq_inc",  "newspaper_type" )



full_parties <- c("Labour", "UKIP", "LibDem", "Tories")

parties <- c("labour", "UKIP", "libdem", "conserv")
issues <- c("EU", "spending", "immigration")
waves <- c("w1", "w2", "w3", "w4")
t_parties <- c("labo", "ukip", "lide", "tory")


qs <- c("isis","unemployment",  "immigrants")
t_issues=c("eu", "economy", "immigr")
t_waves <- c("p1", "p2", "p3", "p4")
pid <- c(2, 5, 3, 1)
tt_issues <- c("isis", "econ", "imm")





##initialize
names<-list()


models_full_raw=list()
models_full=list()

models_combo=list()


labo_means=list()
labo_ses=list()
ukip_means=list()
ukip_ses=list()
lide_means=list()
lide_ses=list()
tory_means=list()
tory_ses=list()
right_media_means=list()
right_media_ses=list()
cent_media_means=list()
cent_media_ses=list()
left_media_means=list()
left_media_ses=list()


total=list()  
total_relevant_tweets=list()  

n<-list()
c_issues<-c("EU", "spend", "immigration")
t_issues=c("eu", "economy", "immigr")


for (j in 1:length(issues)){
  
  
  ##define outcome variables
  names<-c(names, paste0( " ",issues[j]))
  data$outcome=abs(eval(parse(text=paste0("data$soft_correct_" , c_issues[j], "_w4" ))))
  
  
  data$baseline=abs(eval(parse(text=paste0("data$soft_correct_", c_issues[j], "_w1"))))
  
  ##Look at the relevant tweets from each party in the 3 final time period
  data$tweet_count_labo<-  (
    eval(parse(text=paste0("data$p3_1", "_", t_issues[j], "_labo"))) +
      eval(parse(text=paste0("data$p3_2", "_", t_issues[j], "_labo"))) +
      eval(parse(text=paste0("data$p2_1", "_", t_issues[j], "_labo"))) +
      eval(parse(text=paste0("data$p2_2", "_", t_issues[j], "_labo"))) +
      eval(parse(text=paste0("data$p4_1", "_", t_issues[j], "_labo"))) +
      eval(parse(text=paste0("data$p4_2", "_", t_issues[j], "_labo")))
  )
  
  data$tweet_count_lide<-  (
    eval(parse(text=paste0("data$p3_1", "_", t_issues[j], "_lide"))) +
      eval(parse(text=paste0("data$p3_2", "_", t_issues[j], "_lide"))) +
      eval(parse(text=paste0("data$p2_1", "_", t_issues[j], "_lide"))) +
      eval(parse(text=paste0("data$p2_2", "_", t_issues[j], "_lide"))) +
      eval(parse(text=paste0("data$p4_1", "_", t_issues[j], "_lide"))) +
      eval(parse(text=paste0("data$p4_2", "_", t_issues[j], "_lide")))
  )
  
  data$tweet_count_ukip<-  (
    eval(parse(text=paste0("data$p3_1", "_", t_issues[j], "_ukip"))) +
      eval(parse(text=paste0("data$p3_2", "_", t_issues[j], "_ukip"))) +
      eval(parse(text=paste0("data$p2_1", "_", t_issues[j], "_ukip"))) +
      eval(parse(text=paste0("data$p2_2", "_", t_issues[j], "_ukip"))) +
      eval(parse(text=paste0("data$p4_1", "_", t_issues[j], "_ukip"))) +
      eval(parse(text=paste0("data$p4_2", "_", t_issues[j], "_ukip")))
  )
  
  data$tweet_count_tory<-  (
    eval(parse(text=paste0("data$p3_1", "_", t_issues[j], "_tory"))) +
      eval(parse(text=paste0("data$p3_2", "_", t_issues[j], "_tory"))) +
      eval(parse(text=paste0("data$p2_1", "_", t_issues[j], "_tory"))) +
      eval(parse(text=paste0("data$p2_2", "_", t_issues[j], "_tory"))) +
      eval(parse(text=paste0("data$p4_1", "_", t_issues[j], "_tory"))) +
      eval(parse(text=paste0("data$p4_2", "_", t_issues[j], "_tory")))
  )
  
  
  
  
  data$right_media<-{ 
    + eval(parse(text=paste0("data$p3_media_right_", t_issues[j]))) +
      eval(parse(text=paste0("data$p2_media_right_", t_issues[j]))) +
      eval(parse(text=paste0("data$p4_media_right_", t_issues[j])))
  }
  
  data$right_media_log<-log(1.00001 + data$right_media)
  
  data$left_media<-{ 
    + eval(parse(text=paste0("data$p3_media_left_", t_issues[j]))) +
      eval(parse(text=paste0("data$p2_media_left_", t_issues[j]))) +
      eval(parse(text=paste0("data$p4_media_left_", t_issues[j])))
  }
  
  data$left_media_log<-log(1.00001 + data$left_media)
  
  
  data$cent_media<-{ 
    + eval(parse(text=paste0("data$p3_media_cent_", t_issues[j]))) +
      eval(parse(text=paste0("data$p2_media_cent_", t_issues[j]))) +
      eval(parse(text=paste0("data$p4_media_cent_", t_issues[j])))
  }
  
  data$cent_media_log<-log(1.00001 + data$cent_media)
  
  
  
  ##create aggregate variables 
  data$parties<-log(1.0001+ 
                      (data$tweet_count_lide) + (data$tweet_count_labo) +
                      (data$tweet_count_ukip) + (data$tweet_count_tory))
  
  data$media<-log(1.0001+data$cent_media + data$left_media + data$right_media)
  
  data$total<-log(1.0001+ 
                    (data$tweet_count_lide) + (data$tweet_count_labo) +
                    (data$tweet_count_ukip) + (data$tweet_count_tory) +
                    data$cent_media + data$left_media + data$right_media)
  
  total<-c(total, min(data$total, na.rm=T))
  total_relevant_tweets[[j]] <-data$total
  
  
  ##log the main variables 
  
  data$tweet_count_tory<-log(1.00001 + data$tweet_count_tory)
  
  data$tweet_count_labo<-log(1.00001 + data$tweet_count_labo)
  
  data$tweet_count_lide<-log(1.00001 + data$tweet_count_lide)
  
  data$tweet_count_ukip<-log(1.00001 + data$tweet_count_ukip)
  
  
  ## model specifications
  
  #save summary
  models_full[[j]]<-summary(glm(paste0("outcome ~ baseline + total + twitter_freq_inc  +"
                                       , paste0(controls, collapse="+")), data=data, na.action=na.exclude, family="binomial"))
  
  #save raw
  models_full_raw[[j]]<-(glm(paste0("outcome ~ baseline + total + twitter_freq_inc  +"
                                    , paste0(controls, collapse="+")), data=data, na.action=na.exclude, family="binomial"))
  
  #combo
  models_combo[[j]]<-summary(glm(paste0("outcome ~ baseline + media + parties + twitter_freq_inc  +"
                                        , paste0(controls, collapse="+")), data=data, na.action=na.exclude, family = "binomial"))
  
}


################################################################## graphing---all tweets 
full_issues <- c("EU", "Spending", "Immigration")



names<-c("EU", "Spending", "Immigration")
index<-c(1, 2, 3)
##extract mean values 
means<-c(models_full[[1]]$coefficients["total","Estimate"],
         models_full[[2]]$coefficients["total","Estimate"],
         models_full[[3]]$coefficients["total","Estimate"]
         
         
)

##extract standard errors values 
ses<-c(models_full[[1]]$coefficients["total","Std. Error"],
       models_full[[2]]$coefficients["total","Std. Error"],
       models_full[[3]]$coefficients["total","Std. Error"]
)  



means<-unlist(means)
ses<-unlist(ses)

################graphing--combined results

full_issues <- c("EU", "Spending", "Immigration")


all_means<-vector()
all_ses<-vector()

for(i in 1:length(models_combo)){
  
  names<-c("Parties", "Media")
  index<-c(.1, .2)
  ##extract mean values 
  means<-c(models_combo[[i]]$coefficients["parties","Estimate"],
           models_combo[[i]]$coefficients["media","Estimate"])
  
  ##extract standard errors values 
  ses<-c(models_combo[[i]]$coefficients["parties","Std. Error"],
         models_combo[[i]]$coefficients["media","Std. Error"])  
  
  
  
  all_means<-c(all_means,unlist(means))
  all_ses<-c(all_ses,unlist(ses))
  
  
}



all_means_placement<-all_means

all_ses_placement<-all_ses


##re-arrange to be media / party


all_means_media<-c(all_means_fact[c(2,4,6)], all_means_placement[c(2,4,6)])
all_ses_media<-c(all_ses_fact[c(2,4,6)], all_ses_placement[c(2,4,6)])


all_means_parties<-c(all_means_fact[c(1,3,5)], all_means_placement[c(1,3,5)])
all_ses_parties<-c(all_ses_fact[c(1,3,5)], all_ses_placement[c(1,3,5)])



####################################

#Combine into one graph ---- media


##plot 
pdf(paste0("results/all_media_combo.pdf"), 12,4)
par(mar=c(4, 10, 4, 2)  )
index<-seq(.1, .6, length.out = 6)
plot( all_means_media, index, xlim=c(-.6, .6), ylim=c(.09, .61), ylab="", yaxt="n", xlab="Logistic regression coefficients on number of relevant tweets",
      main = paste0("Effects of Tweets From Media"), 
    pch=3)


names<- c("Facts: ISIS","Facts: Unemployment",
          "Facts: Immigration"  , "Placement: EU", "Placement: Spending", "Placement: Immigration"        )
axis(2, at=index, labels =paste0(names), las=2)
abline(v=0)
points(all_means_media + 1.64*all_ses_media, index, pch="|")
points(all_means_media - 1.64*all_ses_media, index, pch="|")
for(i in 1:length(all_means_media)){
  lines(c(all_means_media[i] - 1.96*all_ses_media[i],all_means_media[i] + 1.96*all_ses_media[i]), c(index[i],index[i]))
}

dev.off()


####################################

#Combine into one graph ---- media


##plot 
pdf(paste0("results/all_party_combo.pdf"), 12,4)
par(mar=c(4, 10, 4, 2)  )
index<-seq(.1, .6, length.out = 6)
plot( all_means_parties, index, xlim=c(-.6, .6), ylim=c(.09, .61), ylab="", yaxt="n", xlab="Logistic regression coefficients on number of relevant tweets",
      main = paste0("Effects of Tweets From Parties"), 
      pch=3)


names<- c("Facts: ISIS","Facts: Unemployment",
          "Facts: Immigration"     , "Placement: EU", "Placement: Spending", "Placement: Immigration"     )
axis(2, at=index, labels =paste0(names), las=2)
abline(v=0)
points(all_means_parties + 1.64*all_ses_parties, index, pch="|")
points(all_means_parties - 1.64*all_ses_parties, index, pch="|")
for(i in 1:length(all_means_parties)){
  lines(c(all_means_parties[i] - 1.96*all_ses_parties[i],all_means_parties[i] + 1.96*all_ses_parties[i]), c(index[i],index[i]))
}

dev.off()